import requests
from lxml import etree
import lxml.html
import urllib.request
import ssl

ssl._create_default_https_context = ssl._create_unverified_context

def downlourl(volume,issue):
    url = "https://journals.sagepub.com/toc/stja/%s/%s"%(volume,issue)
    return(url)

headers = {
    'Connection':'keep-alive',
    'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:66.0) Gecko/20100101 Firefox/66.0'
}

def getlists(volume,issue):
    r = requests.get(downlourl(str(volume), str(issue)), headers=headers)
    page = etree.HTML(r.content)
    title = [a for a in page.xpath('//span[@class="hlFld-Title"]/text()')]
    abstract = [a for a in page.xpath('//div[@class="abstract-section"]/a/@href')]
    pdfaddress = [a for a in page.xpath('//a[@data-item-name="download-PDF"]/@href')]
    pagenum = [a for a in page.xpath('//span[@class="articlePageRange"]/text()')]
    volumeinfo = page.xpath('//div[@class="journalNavTitle"]/text()')
    authors = page.xpath('//span[@class="articleEntryAuthorsLinks"]')
    print("This is %s" % volumeinfo[0].strip())
    f.write("\n"+"**this is %s**" % volumeinfo[0].strip()+"\n")
    if volume<10:
        year="200%s"%volume
    else:
        year="20%s"%volume
    for i in range(len(title)):
        authorlist = []
        try:
            raw = authors[i].iterfind('.//span[@class="contribDegrees"]/a[@class="entryAuthor"]')
            for j in raw:
                authorlist.append(j.text)
        except:
            authorlist.append("——")
        pagerange =  pagenum[i].split(".")[-1]
        link = '[pdf](https://journals.sagepub.com%s)'%(pdfaddress[i])
        f.write(", ".join(authorlist + [year, title[i], 'Stata Journal',"%s(%s):%s. "%(volume,issue,pagerange)])+link+"\n")

if __name__=="__main__":
    f = open("sjlist.md",'a')
    for j in range(1,20):
        for k in range(1,5):
            try:
                getlists(j,k)
            except:
                print("Need reload volume %s issue %s"%(j,k))
    f.close()